### ### ### ### ### ### ### ### ### 
#
#
# Spillover Effects in International Law: Evidence from Tax Planning ####
# (International Studies Quarterly, 202X)
# Calvin Thrall
#
# Replication code
#
### ### ### ### ### ### ### ### ### 

# all analyses conducted on a Mac Studio with M2 Max Chip, 
# MacOS Ventura 13.5, with R version 4.4.1 'Race for Your Life'

# PACKAGES ####
library(tidyverse) # v2.0.0 — many data cleaning/manipulation tools
library(skimr) # v2.1.5 — data summaries
library(xtable) # v1.8-4 — producing tables
library(modelsummary) # v2.1.1 — producing tables
options(modelsummary_factory_latex = 'kableExtra')
options("modelsummary_format_numeric_latex" = "plain")
library(interflex) # v1.2.6 — estimate nonlinear heterogeneous efx
library(fixest) # v0.12.1 — fast regressions
library(data.table) # v1.15.4 — function to read large datasets efficiently

# DATA ####

# data for selection models — SEC sample

selection_sec <- read_csv("selection_data_sec_sample.csv")

# data for selection models - Amadeus sample

selection_ama <- read_csv("selection_data_amadeus_sample.csv")

# data for conduit location models - ISDS sample

cl_isds <- read_csv("conduit_location_data_isds_sample.csv")

# data for conduit location models - SEC sample

cl_sec <- read_csv("conduit_location_data_sec_sample.csv")

# data for conduit location models - Amadeus sample

cl_ama <- fread("conduit_location_data_amadeus_sample.csv")

# data for figure 3 

fig_3_data <- read_csv("data_for_figure_3.csv")

# ISDS case-level data for in-text statistics

isds_case_data <- read_csv("isds_case_data_replication.csv")

# BITs/BTTs data from 2007, also for in-text statistic 
# originally from replication data of Barthel and Neumayer (2012).

bits_btts_correlation_data <- read_csv("bits_btts_correlation_data.csv")


# MAIN TEXT——————————————————————————————————####


# DESCRIPTIVE FIGURES ####


# —FIGURE 3: DISTRIBUTIONS OF PROXY CLAIMANT/PARENT NATIONALITIES ####

fig_3 <- fig_3_data %>% 
ggplot(aes(x = fct_reorder(Country, amount), y = amount,
           fill = `# of times served as a...`)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = abs) +
  scale_fill_manual(values = c("black", "darkgrey")) +
  coord_flip() +
  labs(x = "", y = "") +
  theme_bw() +
  theme(legend.position = "bottom")

ggsave("dist_change_plot.eps", fig_3,
       width = 6.5, height = 8, units = "in")  

# —FIGURE 7: CONDUIT SUBSIDIARIES BY TREATY ACCESS ####


twobytwo_plot <- cl_sec %>%
  filter(chosen == 1) %>%
  rename(conduit_host_BIT = BIT_lag_10,
         conduit_host_BTT = BTT_lag_10_conduithost) %>% 
  filter(chosen == 1) %>% 
  select(conduit_host_BIT, conduit_host_BTT) %>% 
  bind_rows(cl_ama %>% 
              filter(chosen == 1) %>% 
              select(conduit_host_BIT, conduit_host_BTT)) %>% 
  count(conduit_host_BTT,conduit_host_BIT) %>% 
  mutate(conduit_host_BIT = if_else(conduit_host_BIT == 1,
                                    "BIT w/Host State", "No BIT w/Host State"),
         conduit_host_BTT = if_else(conduit_host_BTT == 1,
                                    "BTT w/Host State", "No BTT w/Host State")) %>%
  ggplot(aes(y = n, x = conduit_host_BIT, fill = conduit_host_BTT)) +
  geom_col() +
  labs(fill = "", x = "", y = "Conduit subsidiaries (count)") +
  theme_minimal() +
  theme(axis.text = element_text(size = 16),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 16),
        legend.position = "inside",
        legend.position.inside = c(.25, .6)
        ) +
  scale_fill_manual(values = c("black", "darkgrey"))

ggsave("twobytwo_plot.eps", twobytwo_plot,
       width = 6.5, height = 5, units = "in")


# REGRESSIONS + RELATED TABLES AND FIGURES ####


# —TABLE 2: SELECTION INTO INDIRECT INVESTMENT ####

# ——REGRESSIONS ####
 
selection_sec_1 <- feols(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                         Roy_Mean_hosthome + BIT_lag_10, cluster = ~ top_parent_id_parent,
                       data = selection_sec)

selection_sec_2 <- feols(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                              Roy_Mean_hosthome + BIT_lag_10 + 
                              log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                              political_risk_1997, cluster = ~ top_parent_id_parent,
                            data = selection_sec)

selection_sec_3 <- feols(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                          Roy_Mean_hosthome + BIT_lag_10 + 
                          log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                          political_risk_1997 | top_parent_id_parent, cluster = ~ top_parent_id_parent,
                        data = selection_sec)

selection_ama_1 <- feols(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                             Roy_Mean_direct + home_host_BIT, cluster = ~ GUO_NAME,
                           data = selection_ama)

selection_ama_2 <- feols(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                  Roy_Mean_direct + home_host_BIT + 
                                  log_gdppc_host + v2x_api_VDEM_host +
                                  political_risk_host, cluster = ~ GUO_NAME,
                                data = selection_ama)

selection_ama_3 <- feols(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                              Roy_Mean_direct + home_host_BIT + 
                              log_gdppc_host + v2x_api_VDEM_host +
                              political_risk_host | GUO_NAME + DATEINC_year,
                            cluster = ~ GUO_NAME,
                            data = selection_ama)

# ——TABLE ####

modelsummary(models = list(selection_sec_1, selection_sec_2,
                           selection_sec_3, selection_ama_1,
                           selection_ama_2, selection_ama_3),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_hosthome" = "Tax Treaty (home-host)",
                          "home_host_BTT" = "Tax Treaty (home-host)",
                          "Div_Mean_hosthome" = "Withholding tax (Dividends)",
                          "Div_Mean_direct" = "Withholding tax (Dividends)",
                          "Int_Mean_hosthome" = "Withholding tax (Interest)",
                          "Int_Mean_direct" = "Withholding tax (Interest)",
                          "Roy_Mean_hosthome" = "Withholding tax (Royalties)",
                          "Roy_Mean_direct" = "Withholding tax (Royalties)",
                          "BIT_lag_10" = "BIT (home-host)",
                          "home_host_BIT" = "BIT (home-host)"))

# —FIGURE 4: EFFECT OF BIT COVERAGE ON SELECTION INTO INDIRECT INVESTMENT BY HOST STATE POLITICAL RISK ####

# ——SEC SAMPLE ####

set.seed(51)

binning_BIT_selection <- interflex(estimator = "binning", Y = "indirect",
                                   D = "BIT_lag_10", X = "political_risk_1997",
                                   Z = c("BTT_lag_10_hosthome", "Div_Mean_hosthome",
                                         "Int_Mean_hosthome", "Roy_Mean_hosthome",
                                         "log_gdppc_10", "v2x_api_VDEM_1997"),
                                   data = as.data.frame(selection_sec),
                                   FE = "top_parent_id_parent",
                                   full.moderate = T,
                                   cutoffs = c(-1, 0),
                                   vcov.type = "cluster",
                                   cl= "top_parent_id_parent",
                                   na.rm = T)

binning_BIT_selection_plot <- plot(binning_BIT_selection,
                                   ylab = "Marginal effect of Home-Host BIT",
                                   xlab = "Political risk in host state",
                                   theme.bw = T)

ggsave("binning_BIT_selection_plot.jpg", binning_BIT_selection_plot,
       width = 6, height = 6, units = "in")

# ——AMADEUS SAMPLE ####

set.seed(51)

binning_BIT_selection_ama <- interflex(estimator = "binning", Y = "indirect_dummy",
                                       D = "home_host_BIT", X = "political_risk_host",
                                       Z = c("home_host_BTT", "Div_Mean_direct",
                                             "Int_Mean_direct", "Roy_Mean_direct",
                                             "log_gdppc_host", "v2x_api_VDEM_host"),
                                       data = as.data.frame(selection_ama),
                                       FE = c("GUO_NAME", "DATEINC_year"),
                                       full.moderate = T,
                                       vcov.type = "cluster",
                                       cl= "GUO_NAME",
                                       na.rm = T)

binning_BIT_selection_plot_ama <- plot(binning_BIT_selection_ama,
                                       ylab = "Marginal effect of Home-Host BIT",
                                       xlab = "Political risk in host state",
                                       theme.bw = T)

ggsave("binning_BIT_selection_plot_ama.jpg", binning_BIT_selection_plot_ama,
       width = 6, height = 6, units = "in")


# —TABLE 4: CONDUIT LOCATION, ISDS SAMPLE ####

# ——REGRESSIONS ####
# (plus calculations of mean(Y))


cl_isds_tax_only <- feols(chosen ~ conduit_CIT_lag10 +
                           dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                           Int_effective_Alt_lag10 + Div_effective_Alt_lag10 + netherlands,
                         data = cl_isds,
                         cluster = ~ case)

cl_isds %>% filter(is.na(conduit_CIT_lag10) == F &
                                  is.na(dtt_conduit_parent_lag10) == F &
                                  is.na(dtt_host_conduit_lag10) == F &
                                  is.na(Int_effective_Alt_lag10) == F &
                                  is.na(Div_effective_Alt_lag10) == F) %>%
  select(chosen) %>%
  unlist() %>%
  mean()

cl_isds_bit_only <- feols(chosen ~ conduit_host_BIT + netherlands,
                         data = cl_isds,
                         cluster = ~ case)

cl_isds %>% filter(is.na(conduit_host_BIT) == F) %>%
  select(chosen) %>%
  unlist() %>%
  mean()

cl_isds_controls_year <- feols(chosen ~ conduit_CIT_lag10 +
                                dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                                conduit_host_BIT +
                                log(gdppc_lag10_conduit) + netherlands | year,
                              cluster = ~ case,
                              data = cl_isds)


cl_isds_controls_full <- feols(chosen ~ conduit_CIT_lag10 +
                                dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                                conduit_host_BIT +  log(gdppc_lag10_conduit) +
                                netherlands | year + case,
                              cluster = ~ case,
                              data = cl_isds
)


cl_isds %>% filter(is.na(conduit_CIT_lag10) == F &
                     is.na(dtt_conduit_parent_lag10) == F &
                     is.na(dtt_host_conduit_lag10) == F &
                     is.na(Int_effective_Alt_lag10) == F &
                     is.na(Div_effective_Alt_lag10) == F &
                     is.na(conduit_host_BIT) == F &
                     is.na(gdppc_lag10_conduit) == F) %>%
  select(chosen) %>%
  unlist() %>%
  mean()

# note: since the only difference between 'controls_year' and 'controls_full" models
# is the inclusion of case FE in the latter, and there are no missing values of the
# case variable, the above value of mean(Y) applies to both models.

# ——TABLE ####

modelsummary(list(cl_isds_tax_only, cl_isds_bit_only,
                  cl_isds_controls_year, cl_isds_controls_full),
             output = "latex",
             stars = T,
             coef_map = c("conduit_CIT_lag10" = "Corporate income tax rate",
                          "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
                          "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
                          "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
                          "Int_effective_Alt_lag10" = "Withholding tax (interest)",
                          "conduit_host_BIT" = "BIT (w/host)"))

# —TABLE 5: CONDUIT LOCATION, SEC + AMADEUS SAMPLES ####

# ——REGRESSIONS ####

# SEC sample

cl_sec_taxonly <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec %>% 
  filter(is.na(CIT_lag_10) == F &
           is.na(BTT_lag_10_conduithost) == F &
           is.na(BTT_lag_10_parentconduit) == F &
           is.na(Div_effective) == F &
           is.na(Int_effective) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

cl_sec_bitonly <- feols(
  chosen ~ BIT_lag_10,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec %>% 
  filter(is.na(BIT_lag_10) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

cl_sec_controls <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1),
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec %>% 
  filter(is.na(CIT_lag_10) == F &
           is.na(BTT_lag_10_conduithost) == F &
           is.na(BTT_lag_10_parentconduit) == F &
           is.na(Div_effective) == F &
           is.na(Int_effective) == F &
           is.na(gdppc_WDI_1997) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

cl_sec_full <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec %>% 
  filter(is.na(CIT_lag_10) == F &
           is.na(BTT_lag_10_conduithost) == F &
           is.na(BTT_lag_10_parentconduit) == F &
           is.na(Div_effective) == F &
           is.na(Int_effective) == F &
           is.na(netherlands) == F &
           is.na(top_parent_id) == F &
           is.na(gdppc_WDI_1997) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

# Amadeus sample 


cl_ama_taxonly <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama %>% 
  filter(is.na(potential_conduit_CIT) == F &
           is.na(conduit_host_BTT) == F &
           is.na(parent_conduit_BTT) == F &
           is.na(Div_effective) == F &
           is.na(Int_effective) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

cl_ama_bitonly <- feols(
  chosen ~ conduit_host_BIT,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama %>% 
  filter(is.na(conduit_host_BIT) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

cl_ama_controls <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands |
    DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)


cl_ama_full <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)


cl_ama %>% 
  filter(is.na(potential_conduit_CIT) == F &
           is.na(conduit_host_BTT) == F &
           is.na(parent_conduit_BTT) == F &
           is.na(Div_effective) == F &
           is.na(Int_effective) == F &
           is.na(gdppc_potentialconduit) == F) %>% 
  select(chosen) %>% 
  unlist() %>% 
  mean()

# note: since the only difference between cl_ama_controls and cl_ama_full
# is the latter's inclusion of parent firm FE, and there is no missingness
# in the parent firm variable, the value of mean(Y) will be the same for 
# both samples.

# ——TABLE ####


modelsummary(list(cl_sec_taxonly, cl_sec_bitonly,
                  cl_sec_controls, cl_sec_full,
                  cl_ama_taxonly, cl_ama_bitonly,
                  cl_ama_controls, cl_ama_full),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "BIT_lag_10" = "BIT (w/host)",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "potential_conduit_CIT" = "Corporate income tax rate",
                          "conduit_host_BIT" = "BIT (w/host)"))

# —FIGURE 5: CONDUIT LOCATION MODEL COMPARISONS ####


cl_all_plot <- modelplot(list("ISDS" = cl_isds_controls_full,
                              "SEC" = cl_sec_full,
                              "Amadeus" = cl_ama_full),
                         coef_map = c("BTT_lag_10_conduithost" = "Tax Treaty (w/host)",
                                      "BTT_lag_10_parentconduit" = "Tax Treaty (w/home)",
                                      "Div_effective" = "Withholding tax (Dividends)",
                                      "Int_effective" = "Withholding tax (Interest)",
                                      "CIT_lag_10" = "Corporate Income Tax Rate",
                                      "BIT_lag_10" = "BIT (w/host)",
                                      "dtt_conduit_parent_lag10" = "Tax Treaty (w/home)",
                                      "dtt_host_conduit_lag10" = "Tax Treaty (w/host)",
                                      "Int_effective_Alt_lag10" = "Withholding tax (Interest)",
                                      "Div_effective_Alt_lag10" = "Withholding tax (Dividends)",
                                      "conduit_CIT_lag10" = "Corporate Income Tax Rate",
                                      "conduit_host_BIT" = "BIT (w/host)",
                                      "parent_conduit_BTT" = "Tax Treaty (w/home)",
                                      "conduit_host_BTT" = "Tax Treaty (w/host)",
                                      "potential_conduit_CIT" = "Corporate Income Tax Rate"),
                         draw = F)  %>% 
  mutate(estimate = if_else(
    grepl("Withholding", term) == T | grepl("Income", term) == T,
    estimate/10, estimate
  ),
  conf.low = if_else(
    grepl("Withholding", term) == T | grepl("Income", term) == T,
    conf.low/10, conf.low
  ),
  conf.high = if_else(
    grepl("Withholding", term) == T | grepl("Income", term) == T,
    conf.high/10, conf.high
  )) %>%  
  mutate(model = factor(model, levels = c("ISDS",
                                          "SEC",
                                          "Amadeus")),
         term = factor(term, levels = c("BIT (w/host)",
                                        "Corporate Income Tax Rate",
                                        "Withholding tax (Interest)",
                                        "Withholding tax (Dividends)",
                                        "Tax Treaty (w/home)",
                                        "Tax Treaty (w/host)"
                                        ))) %>% 
  ggplot(aes(y = term, x = estimate, shape = model)) +
  geom_point(position = position_dodge(width = .75),
             size = 2) +
  geom_linerange(aes(xmin = conf.low, xmax = conf.high),
                 position = position_dodge(width = .75)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .75) +
  geom_hline(yintercept = 1.5, alpha = .6) +
  geom_hline(yintercept = 2.5, alpha = .6) +
  geom_hline(yintercept = 3.5, alpha = .6) +
  geom_hline(yintercept = 4.5, alpha = .6) +
  geom_hline(yintercept = 5.5, alpha = .6) +
  scale_shape_manual(values = c(17, 13, 19),
                     guide = guide_legend(reverse = T)) +
  theme_bw() +
  labs(x = "", y = "", color = "", shape = "Sample") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12))

ggsave("cl_all_plot.jpg", cl_all_plot,
       width = 6.5, height = 4.5, units = "in")


# —FIGURE 6 ####

# ——ISDS SAMPLE ####

set.seed(51)

binning_isds <- interflex(estimator = "binning", Y = "chosen",
                         D = "conduit_host_BIT", X = "political_risk_lag10",
                         Z = c("conduit_CIT_lag10", "dtt_conduit_parent_lag10",
                               "dtt_host_conduit_lag10", "Int_effective_Alt_lag10",
                               "Div_effective_Alt_lag10", "netherlands",
                               "log_gddppc_lag10_conduit"),
                         data = as.data.frame(cl_isds),
                         FE = "year",
                         full.moderate = T,
                         vcov.type = "cluster",
                         cl= "case",
                         na.rm = T)

binning_isds_plot <- plot(binning_isds,
                         ylab = "Marginal effect of BIT on conduit location",
                         xlab = "Political risk in host state",
                         theme.bw = T)

ggsave("binning_isds_plot.jpg", binning_isds_plot, width = 6, height = 6,
       units = "in")

# ——SEC SAMPLE ####


set.seed(51)

binning_sec <- interflex(estimator = "binning", Y = "chosen",
                             D = "BIT_lag_10", X = "political_risk_1997",
                             Z = c("CIT_lag_10", "BTT_lag_10_conduithost",
                                   "BTT_lag_10_parentconduit", "Div_effective", "netherlands",
                                   "Int_effective", "log_gdppc_10"),
                             data = as.data.frame(cl_sec),
                             FE = "top_parent_id",
                             full.moderate = T,
                             vcov.type = "cluster",
                             cl= "top_parent_id",
                             na.rm = T)

binning_sec_plot <- plot(binning_sec,
                             ylab = "Marginal effect of BIT on conduit location",
                             xlab = "Political risk in host state",
                             theme.bw = T)

ggsave("binning_sec_plot.jpg", binning_sec_plot, width = 6, height = 6,
       units = "in")


# ——AMADEUS SAMPLE ####

set.seed(51)

# the below interflex call takes approx 20 seconds to run

binning_ama <- interflex(estimator = "binning", Y = "chosen",
                             D = "conduit_host_BIT", X = "political_risk_host",
                             Z = c("potential_conduit_CIT", "conduit_host_BTT",
                                   "parent_conduit_BTT", "Div_effective", "netherlands",
                                   "Int_effective", "gdppc_potentialconduit_log"),
                             data = as.data.frame(cl_ama),
                             FE = c("GUO_NAME", "DATEINC_year"),
                             full.moderate = T,
                             vcov.type = "cluster",
                             cl= "GUO_NAME",
                             na.rm = T)

binning_ama_plot <- plot(binning_ama,
                             ylab = "Marginal effect of BIT on conduit location",
                             xlab = "Political risk in host state",
                             theme.bw = T)

ggsave("binning_ama_plot.jpg", binning_ama_plot,
       width = 6, height = 6, units = "in")

# STATISTICS MENTIONED IN TEXT ####

# —P1: "These cases are associated with the supermajority (71%) of all damages awarded in the regime ($71.6B)." ####

amount_by_proxy <- isds_case_data %>% 
  select(amount_awarded, proxy_dummy) %>% 
  separate(col = amount_awarded,
           into = c("amountaward1", "amountaward2"),
           sep = "\\(", remove = F) %>% 
  mutate(amountaward = case_when(
    grepl("USD", amountaward1) == T ~
      amountaward1 %>% 
      str_remove_all("[[:alpha:]]") %>% 
      as.numeric(),
    grepl("USD", amountaward2) == T ~
      amountaward2 %>% 
      str_remove_all("[[:alpha:]]") %>% 
      str_remove_all("\\)") %>% 
      as.numeric(),
    T ~ NA_real_
  )) %>% 
  group_by(proxy_dummy) %>% 
  summarize(amountaward = sum(amountaward, na.rm = T),
            n = n())

# total award amount for proxy arbitrations (n=194) = $71,613,090,000

amount_by_proxy[[2,2]]/sum(amount_by_proxy$amountaward)

# 71.2%


# —P2: "56% of the dyads that have an active BTT also have an active BIT" ####

bits_btts_correlation_data %>% 
  filter(btt == 1) %>% 
  select(bit) %>% 
  unlist() %>% 
  mean()

# —P3: "I introduce a new dataset on the corporate ownership structures of over 1,000 claimant firms..." ####
# and P15: "I collected this information for over 1,000 claimants..."

isds_case_data %>% 
  select(Company1:Company5) %>% 
  pivot_longer(cols = everything(),
               names_to = "company_no",
               values_to = "company") %>% 
  filter(is.na(company) == F) %>% 
  nrow()

# APPENDIX——————————————————————————————————####

# —TABLE A.1: SUMMARY STATISTICS ####
# note: i manually split this table into each sample segment.

                                              # SEC SAMPLE
cl_sec %>% 
  select("BTT w/host" = BTT_lag_10_conduithost,
         "BTT w/home" = BTT_lag_10_parentconduit,
         "Withholding tax rate (Dividends)" = Div_effective,
         "Withholding tax rate (Interest)" = Int_effective,
         "Withholding tax rate (Royalties)" = Roy_effective,
         "Corporate income tax rate" = CIT_lag_10,
         "BIT w/host" = BIT_lag_10,
         "Political risk" = political_risk_1997,
         "GDPP per cap" = gdppc_WDI_1997) %>%
  skim() %>% 
  tibble() %>% 
  select(var = skim_variable,
         mean = numeric.mean,
         sd = numeric.sd) %>% 
  bind_cols(
    cl_sec %>% 
      filter(chosen == 1) %>% 
      select("BTT w/host" = BTT_lag_10_conduithost,
             "BTT w/home" = BTT_lag_10_parentconduit,
             "Withholding tax rate (Dividends)" = Div_effective,
             "Withholding tax rate (Interest)" = Int_effective,
             "Withholding tax rate (Royalties)" = Roy_effective,
             "Corporate income tax rate" = CIT_lag_10,
             "BIT w/host" = BIT_lag_10,
             "Political risk" = political_risk_1997,
             "GDPP per cap" = gdppc_WDI_1997) %>%
      skim() %>%
      tibble() %>% 
      select(mean_chose = numeric.mean,
             sd_chose = numeric.sd)
  ) %>%                                 # ISDS SAMPLE
  bind_rows(
    cl_isds %>% 
      select("BTT w/host" = dtt_host_conduit_lag10,
             "BTT w/home" = dtt_conduit_parent_lag10,
             "Withholding tax rate (Dividends)" = Div_effective_Alt_lag10,
             "Withholding tax rate (Interest)" = Int_effective_Alt_lag10,
             "Withholding tax rate (Royalties)" = Roy_effective_Alt_lag10,
             "Corporate income tax rate" = conduit_CIT_lag10,
             "BIT w/host" = conduit_host_BIT,
             "Political risk" = political_risk_lag10,
             "GDPP per cap" = gdppc_lag10_conduit) %>%
      skim() %>%
      tibble() %>% 
      select(var = skim_variable,
             mean = numeric.mean,
             sd = numeric.sd) %>% 
      bind_cols(
        cl_isds %>% 
          filter(chosen == 1) %>% 
          select("BTT w/host" = dtt_host_conduit_lag10,
                 "BTT w/home" = dtt_conduit_parent_lag10,
                 "Withholding tax rate (Dividends)" = Div_effective_Alt_lag10,
                 "Withholding tax rate (Interest)" = Int_effective_Alt_lag10,
                 "Withholding tax rate (Royalties)" = Roy_effective_Alt_lag10,
                 "Corporate income tax rate" = conduit_CIT_lag10,
                 "BIT w/host" = conduit_host_BIT,
                 "Political risk" = political_risk_lag10,
                 "GDPP per cap" = gdppc_lag10_conduit) %>%
          skim() %>%
          tibble() %>% 
          select(mean_chose = numeric.mean,
                 sd_chose = numeric.sd)
      )
  ) %>% 
  bind_rows(                              # AMADEUS SAMPLE
    cl_ama %>% 
      select("BTT w/host" = conduit_host_BTT,
             "BTT w/home" = parent_conduit_BTT,
             "Withholding tax rate (Dividends)" = Div_effective,
             "Withholding tax rate (Interest)" = Int_effective,
             "Withholding tax rate (Royalties)" = Roy_effective,
             "Corporate income tax rate" = potential_conduit_CIT,
             "BIT w/host" = conduit_host_BIT,
             "Political risk" = political_risk_host,
             "GDPP per cap" = gdppc_potentialconduit) %>%
      skim() %>%
      tibble() %>% 
      select(var = skim_variable,
             mean = numeric.mean,
             sd = numeric.sd) %>% 
      bind_cols(
        cl_ama %>% 
          filter(chosen == 1) %>% 
          select("BTT w/host" = conduit_host_BTT,
                 "BTT w/home" = parent_conduit_BTT,
                 "Withholding tax rate (Dividends)" = Div_effective,
                 "Withholding tax rate (Interest)" = Int_effective,
                 "Withholding tax rate (Royalties)" = Roy_effective,
                 "Corporate income tax rate" = potential_conduit_CIT,
                 "BIT w/host" = conduit_host_BIT,
                 "Political risk" = political_risk_host,
                 "GDPP per cap" = gdppc_potentialconduit) %>%
          skim() %>%
          tibble() %>% 
          select(mean_chose = numeric.mean,
                 sd_chose = numeric.sd)
      )
  ) %>% 
  xtable() %>% 
  print.xtable(include.rownames = F)

# —TABLE A.2: FIRMS IN SEC SAMPLE ####

cl_sec %>% 
  select(company_name) %>% 
  unique() %>% 
  arrange(company_name) %>% 
  mutate(company_name = str_to_upper(company_name)) %>% 
  slice(1:32) %>% 
  bind_cols(
    cl_sec %>% 
      select(company_name) %>% 
      unique() %>% 
      arrange(company_name) %>% 
      mutate(company_name_2 = str_to_upper(company_name)) %>% 
      select(-company_name) %>% 
      slice(33:64)
  ) %>% 
  xtable() %>% 
  print.xtable(include.rownames = F,
               include.colnames = F)

# —TABLE A.3/A.4: INDUSTRIES IN SEC AND AMADEUS SAMPLES ####

cl_sec %>% 
  select(company_name, industry) %>% 
  unique() %>% 
  group_by(industry) %>% 
  summarize(n = n()) %>% 
  mutate(industry = industry %>% 
           replace_na("Missing")) %>% 
  arrange(desc(n)) %>% 
  xtable() %>% 
  print.xtable(include.rownames = F)

cl_ama %>% 
  select(GUO_NAME, industry_parent) %>% 
  unique() %>% 
  group_by(industry_parent) %>% 
  summarize(n = n()) %>% 
  mutate(industry_parent = industry_parent %>% 
           replace_na("Missing")) %>% 
  arrange(desc(n)) %>% 
  xtable() %>% 
  print.xtable(include.rownames = F)

# —TABLE B.1: FULL MODEL RESULTS, SELECTION MODELS ####


modelsummary(models = list(selection_sec_1, selection_sec_2,
                           selection_sec_3, selection_ama_1,
                           selection_ama_2,
                           selection_ama_3),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_hosthome" = "Tax Treaty (home-host)",
                          "home_host_BTT" = "Tax Treaty (home-host)",
                          "Div_Mean_hosthome" = "Withholding tax (Dividends)",
                          "Div_Mean_direct" = "Withholding tax (Dividends)",
                          "Int_Mean_hosthome" = "Withholding tax (Interest)",
                          "Int_Mean_direct" = "Withholding tax (Interest)",
                          "Roy_Mean_hosthome" = "Withholding tax (Royalties)",
                          "Roy_Mean_direct" = "Withholding tax (Royalties)",
                          "BIT_lag_10" = "BIT (home-host)",
                          "home_host_BIT" = "BIT (home-host)",
                          "log(gdppc_WDI_1997 + 1)" = "GDP per cap (host, log)",
                          "log_gdppc_host" = "GDP per cap (host, log)",
                          "v2x_api_VDEM_host" = "Polyarchy Index (host)",
                          "v2x_api_VDEM_1997" = "Polyarchy Index (host)", 
                          "political_risk_host" = "Political Risk (host)",
                          "political_risk_1997" = "Political Risk (host)"
             ))

# —TABLE B.2: REGRESSION TABLE ACCOMPANYING FIGURE 4 ####

# SEC sample

binning_select_coefs <- rownames(binning_BIT_selection$model.linear$coefficients) %>% 
  bind_cols(binning_BIT_selection$model.linear$coefficients,
            binning_BIT_selection$model.linear$cse,
            binning_BIT_selection$model.linear$cpval) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  mutate(term = term %>% 
           str_replace("\\.X", " * Political Risk")) %>% 
  filter(grepl("Group", term) == F)

binning_select_bins <- rownames(binning_BIT_selection$model.binning$coefficients) %>% 
  bind_cols(
    binning_BIT_selection$model.binning$coefficients,
    binning_BIT_selection$model.binning$cse,
    binning_BIT_selection$model.binning$cpval
  ) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  filter(term %in% 
           c("D.Group.2.G.1", "D.Group.2.G.2",
             "D.Group.2.G.3"))

binning_select <- bind_rows(binning_select_coefs, binning_select_bins)

binning_select <- binning_select %>% 
  mutate(term = case_when(
    term == "political_risk_1997" ~ "Host Political Risk",
    term == "BTT_lag_10_hosthome" ~ "Tax treaty (home-host)",
    term == "Div_Mean_hosthome" ~ "Withholding tax (Dividends)",
    term == "Int_Mean_hosthome" ~ "Withholding tax (Interest)",
    term == "Roy_Mean_hosthome" ~ "Withholding tax (Royalties)",
    term == "BTT_lag_10_hosthome * Political Risk" ~
      "Tax treaty (home-host) * Risk",
    term == "Div_Mean_hosthome * Political Risk" ~ 
      "Withholding tax (D) * Risk",
    term == "Int_Mean_hosthome * Political Risk" ~ 
      "Withholding tax (I) * Risk",
    term == "Roy_Mean_hosthome * Political Risk" ~
      "Withholding tax (R) * Risk",
    term == "D.Group.2.G.1" ~ "Home-Host BIT (low risk)",
    term == "D.Group.2.G.2" ~ "Home-Host BIT (medium risk)",
    term == "D.Group.2.G.3" ~ "Home-Host BIT (high risk)",
    term == "log_gdppc_10" ~ "GDP per cap (host, log)",
    term == "log_gdppc_10 * Political Risk" ~ "GDP per cap * Risk",
    term == "v2x_api_VDEM_1997" ~ "Polyarchy (host)",
    term == "v2x_api_VDEM_1997 * Political Risk" ~ "Polyarchy * Risk",
    T ~ term
  )) 

# selection into indirect, Amadeus

binning_select_ama_coefs <- rownames(binning_BIT_selection_ama$model.linear$coefficients) %>% 
  bind_cols(binning_BIT_selection_ama$model.linear$coefficients,
            binning_BIT_selection_ama$model.linear$cse,
            binning_BIT_selection_ama$model.linear$cpval) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  mutate(term = term %>% 
           str_replace("\\.X", " * Political Risk")) %>% 
  filter(grepl("Group", term) == F)

binning_select_ama_bins <- rownames(binning_BIT_selection_ama$model.binning$coefficients) %>% 
  bind_cols(
    binning_BIT_selection_ama$model.binning$coefficients,
    binning_BIT_selection_ama$model.binning$cse,
    binning_BIT_selection_ama$model.binning$cpval
  ) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  filter(term %in% 
           c("D.Group.2.G.1", "D.Group.2.G.2",
             "D.Group.2.G.3"))

binning_select_ama <- bind_rows(binning_select_ama_coefs,
                                binning_select_ama_bins)

binning_select_ama <- binning_select_ama %>% 
  mutate(term = case_when(
    term == "political_risk_host" ~ "Host Political Risk",
    term == "home_host_BTT" ~ "Tax treaty (home-host)",
    term == "Div_Mean_direct" ~ "Withholding tax (Dividends)",
    term == "Int_Mean_direct" ~ "Withholding tax (Interest)",
    term == "Roy_Mean_direct" ~ "Withholding tax (Royalties)",
    term == "home_host_BTT * Political Risk" ~
      "Tax treaty (home-host) * Risk",
    term == "Div_Mean_direct * Political Risk" ~ 
      "Withholding tax (D) * Risk",
    term == "Int_Mean_direct * Political Risk" ~ 
      "Withholding tax (I) * Risk",
    term == "Roy_Mean_direct * Political Risk" ~
      "Withholding tax (R) * Risk",
    term == "D.Group.2.G.1" ~ "Home-Host BIT (low risk)",
    term == "D.Group.2.G.2" ~ "Home-Host BIT (medium risk)",
    term == "D.Group.2.G.3" ~ "Home-Host BIT (high risk)",
    term == "log_gdppc_host" ~ "GDP per cap (log, host)",
    term == "log_gdppc_host * Political Risk" ~ "GDP per cap * Risk",
    term == "v2x_api_VDEM_host" ~ "Polyarchy (host)",
    term == "v2x_api_VDEM_host * Political Risk" ~ "Polyarchy * Risk",
    T ~ term
  ))

# Joint table

bind_cols(binning_select, binning_select_ama) %>% 
  select(-5) %>% 
  xtable() %>% 
  print.xtable(include.rownames = F)


# bin medians:

binning_BIT_selection$est.bin$`1` %>% 
  as_tibble() %>% 
  select(1) %>% 
  unlist()

binning_BIT_selection_ama$est.bin$`1` %>% 
  as_tibble() %>% 
  select(1) %>% 
  unlist()

# N

binning_BIT_selection$model.binning$N

binning_BIT_selection_ama$model.binning$N

# —TABLE B.3: FULL MODEL RESULTS, CONDUIT LOCATION MODELS, ISDS SAMPLE ####

modelsummary(list(cl_isds_tax_only, cl_isds_bit_only,
                  cl_isds_controls_year, cl_isds_controls_full),
             output = "latex",
             stars = T,
             coef_map = c("conduit_CIT_lag10" = "Corporate income tax rate",
                          "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
                          "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
                          "Int_effective_Alt_lag10" = "Withholding tax (interest)",
                          "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
                          "conduit_host_BIT" = "BIT (w/host)",
                          "log(gdppc_lag10_conduit)" = "GDP per cap (conduit, log)",
                          "netherlands" = "NLD dummy"))

# —TABLE B.4: FULL MODEL RESULTS, CONDUIT LOCATION MODELS, SEC + AMADEUS SAMPLES ####


modelsummary(list(cl_sec_taxonly, cl_sec_bitonly,
                  cl_sec_controls, cl_sec_full,
                  cl_ama_taxonly, cl_ama_bitonly,
                  cl_ama_controls, cl_ama_full),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "BIT_lag_10" = "BIT (w/host)",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "potential_conduit_CIT" = "Corporate income tax rate",
                          "conduit_host_BIT" = "BIT (w/host)",
                          "log(gdppc_WDI_1997 + 1)" = "GDP per cap (conduit, log)",
                          "log(gdppc_potentialconduit + 1)" = "GDP per cap (conduit, log)",
                          "netherlands" = "NLD dummy"))

# —TABLE B.5: REGRESSION TABLE ACCOMPANYING FIGURE 6 ####

# CL (ISDS)

binning_isds_coefs <- rownames(binning_isds$model.linear$coefficients) %>% 
  bind_cols(binning_isds$model.linear$coefficients,
            binning_isds$model.linear$cse,
            binning_isds$model.linear$cpval) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  mutate(term = term %>% 
           str_replace("\\.X", " * Political Risk")) %>% 
  filter(grepl("Group", term) == F)

binning_isds_bins <- rownames(binning_isds$model.binning$coefficients) %>% 
  bind_cols(
    binning_isds$model.binning$coefficients,
    binning_isds$model.binning$cse,
    binning_isds$model.binning$cpval
  ) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  filter(term %in% 
           c("D.Group.2.G.1", "D.Group.2.G.2",
             "D.Group.2.G.3"))

binning_isds_fortab <- bind_rows(binning_isds_coefs,
                          binning_isds_bins)

binning_isds_fortab <- binning_isds_fortab %>% 
  mutate(term = case_when(
    term == "D.Group.2.G.1" ~ "BIT w/host (low risk)",
    term == "D.Group.2.G.2" ~ "BIT w/host (medium risk)",
    term == "D.Group.2.G.3" ~ "BIT w/host (high risk)",
    term == "dtt_conduit_parent_lag10" ~ "Tax treaty (w/home)",
    term == "dtt_host_conduit_lag10" ~ "Tax treaty (w/host)",
    term == "Div_effective_Alt_lag10" ~ "Withholding tax (dividends)",
    term == "Int_effective_Alt_lag10" ~ "Withholding tax (interest)",
    term == "conduit_CIT_lag10" ~ "Corporate income tax rate",
    term == "political_risk_lag10" ~ "Host Political Risk",
    term == "dtt_conduit_parent_lag10 * Political Risk" ~ 
      "Tax treaty (w/home) * Risk",
    term == "dtt_host_conduit_lag10 * Political Risk" ~
      "Tax treaty (w/host) * Risk",
    term == "Div_effective_Alt_lag10 * Political Risk" ~ 
      "With. tax (D) * Risk",
    term == "Int_effective_Alt_lag10 * Political Risk" ~ 
      "With. tax (I) * Risk",
    term == "conduit_CIT_lag10 * Political Risk" ~ 
      "Corp. tax rate * Risk",
    term == "log_gddppc_lag10_conduit" ~ "GDP per cap (conduit, log)",
    term == "log_gddppc_lag10_conduit * Political Risk" ~ "GDP per cap * Risk",
    term == "netherlands" ~ "NLD dummy",
    term == "netherlands * Political Risk" ~ "NLD * Risk",
    T ~ term
  )) 

# CL (SEC)

binning_sec_coefs <- rownames(binning_sec$model.linear$coefficients) %>% 
  bind_cols(binning_sec$model.linear$coefficients,
            binning_sec$model.linear$cse,
            binning_sec$model.linear$cpval) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  mutate(term = term %>% 
           str_replace("\\.X", " * Political Risk")) %>% 
  filter(grepl("Group", term) == F)

binning_sec_bins <- rownames(binning_sec$model.binning$coefficients) %>% 
  bind_cols(
    binning_sec$model.binning$coefficients,
    binning_sec$model.binning$cse,
    binning_sec$model.binning$cpval
  ) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  filter(term %in% 
           c("D.Group.2.G.1", "D.Group.2.G.2",
             "D.Group.2.G.3"))

binning_sec_fortab <- bind_rows(binning_sec_coefs,
                         binning_sec_bins)

binning_sec_fortab <- binning_sec_fortab %>% 
  mutate(term = case_when(
    term == "D.Group.2.G.1" ~ "BIT w/host (low risk)",
    term == "D.Group.2.G.2" ~ "BIT w/host (medium risk)",
    term == "D.Group.2.G.3" ~ "BIT w/host (high risk)",
    term == "BTT_lag_10_parentconduit" ~ "Tax treaty (w/home)",
    term == "BTT_lag_10_conduithost" ~ "Tax treaty (w/host)",
    term == "Div_effective" ~ "Withholding tax (dividends)",
    term == "Int_effective" ~ "Withholding tax (interest)",
    term == "CIT_lag_10" ~ "Corporate income tax rate",
    term == "political_risk_1997" ~ "Host Political Risk",
    term == "BTT_lag_10_parentconduit * Political Risk" ~ 
      "Tax treaty (w/home) * Risk",
    term == "BTT_lag_10_conduithost * Political Risk" ~
      "Tax treaty (w/host) * Risk",
    term == "Div_effective * Political Risk" ~ 
      "With. tax (D) * Risk",
    term == "Int_effective * Political Risk" ~ 
      "With. tax (I) * Risk",
    term == "CIT_lag_10 * Political Risk" ~ 
      "Corp. tax rate * Risk",
    term == "log_gdppc_10" ~ "GDP per cap (conduit, log)",
    term == "log_gdppc_10 * Political Risk" ~ "GDP per cap * Risk",
    term == "netherlands" ~ "NLD dummy",
    term == "netherlands * Political Risk" ~ "NLD * Risk",
    T ~ term
  ))

# CL (Amadeus)

binning_ama_coefs <- rownames(binning_ama$model.linear$coefficients) %>% 
  bind_cols(binning_ama$model.linear$coefficients,
            binning_ama$model.linear$cse,
            binning_ama$model.linear$cpval) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  mutate(term = term %>% 
           str_replace("\\.X", " * Political Risk")) %>% 
  filter(grepl("Group", term) == F)

binning_ama_bins <- rownames(binning_ama$model.binning$coefficients) %>% 
  bind_cols(
    binning_ama$model.binning$coefficients,
    binning_ama$model.binning$cse,
    binning_ama$model.binning$cpval
  ) %>% 
  rename(term = 1, 
         est = 2,
         se = 3,
         pval = 4) %>% 
  filter(term %in% 
           c("D.Group.2.G.1", "D.Group.2.G.2",
             "D.Group.2.G.3"))

binning_ama_fortab <- bind_rows(binning_ama_coefs,
                         binning_ama_bins)

binning_ama_fortab <- binning_ama_fortab %>% 
  mutate(term = case_when(
    term == "D.Group.2.G.1" ~ "BIT w/host (low risk)",
    term == "D.Group.2.G.2" ~ "BIT w/host (medium risk)",
    term == "D.Group.2.G.3" ~ "BIT w/host (high risk)",
    term == "parent_conduit_BTT" ~ "Tax treaty (w/home)",
    term == "conduit_host_BTT" ~ "Tax treaty (w/host)",
    term == "Div_effective" ~ "Withholding tax (dividends)",
    term == "Int_effective" ~ "Withholding tax (interest)",
    term == "potential_conduit_CIT" ~ "Corporate income tax rate",
    term == "political_risk_host" ~ "Host Political Risk",
    term == "parent_conduit_BTT * Political Risk" ~ 
      "Tax treaty (w/home) * Risk",
    term == "conduit_host_BTT * Political Risk" ~
      "Tax treaty (w/host) * Risk",
    term == "Div_effective * Political Risk" ~ 
      "With. tax (D) * Risk",
    term == "Int_effective * Political Risk" ~ 
      "With. tax (I) * Risk",
    term == "potential_conduit_CIT * Political Risk" ~ 
      "Corp. tax rate * Risk",
    term == "gdppc_potentialconduit_log" ~ "GDP per cap (conduit, log)",
    term == "gdppc_potentialconduit_log * Political Risk" ~ "GDP per cap * Risk",
    term == "netherlands" ~ "NLD dummy",
    term == "netherlands * Political Risk" ~ "NLD * Risk",
    T ~ term
  )) 

# joint table

bind_cols(binning_isds_fortab, binning_sec_fortab, binning_ama_fortab) %>%
  select(-5, -9) %>%
  xtable() %>% 
  print.xtable(include.rownames = F)

# bin medians:

binning_isds$est.bin$`1` %>% 
  as_tibble() %>% 
  select(1) %>% 
  unlist()

binning_sec$est.bin$`1` %>% 
  as_tibble() %>% 
  select(1) %>% 
  unlist()

binning_ama$est.bin$`1` %>% 
  as_tibble() %>% 
  select(1) %>% 
  unlist()

# N

binning_isds$model.binning$N

binning_sec$model.binning$N

binning_ama$model.binning$N


# —TABLE B.6: SELECTION MODELS, LOGISTIC REGRESSION ####


selection_bit_logit <- femlm(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                               Roy_Mean_hosthome + BIT_lag_10, family = "logit",
                             cluster = ~ top_parent_id_parent,
                             data = selection_sec)

selection_controls_logit <- femlm(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                                    Roy_Mean_hosthome + BIT_lag_10 + 
                                    log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                                    political_risk_1997, family = "logit",
                                  cluster = ~ top_parent_id_parent,
                                  data = selection_sec)

selection_full_logit <- femlm(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                                Roy_Mean_hosthome + BIT_lag_10 + 
                                log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                                political_risk_1997 | top_parent_id_parent,
                              family = "logit",
                              cluster = ~ top_parent_id_parent,
                              data = selection_sec)

selection_bit_ama_logit <- femlm(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                   Roy_Mean_direct + home_host_BIT,
                                 family = "logit",
                                 cluster = ~ GUO_NAME,
                                 data = selection_ama)

selection_controls_ama_logit <- femlm(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                        Roy_Mean_direct + home_host_BIT + 
                                        log_gdppc_host + v2x_api_VDEM_host +
                                        political_risk_host,
                                      family = "logit",
                                      cluster = ~ GUO_NAME,
                                      data = selection_ama)

selection_full_ama_logit <- femlm(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                    Roy_Mean_direct + home_host_BIT + 
                                    log_gdppc_host + v2x_api_VDEM_host +
                                    political_risk_host | GUO_NAME + DATEINC_year,
                                  family = "logit",
                                  cluster = ~ GUO_NAME,
                                  data = selection_ama)

modelsummary(models = list(selection_bit_logit,
                           selection_controls_logit,
                           selection_full_logit,
                           selection_bit_ama_logit,
                           selection_controls_ama_logit,
                           selection_full_ama_logit),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_hosthome" = "Tax Treaty (home-host)",
                          "home_host_BTT" = "Tax Treaty (home-host)",
                          "Div_Mean_hosthome" = "Withholding tax (Dividends)",
                          "Div_Mean_direct" = "Withholding tax (Dividends)",
                          "Int_Mean_hosthome" = "Withholding tax (Interest)",
                          "Int_Mean_direct" = "Withholding tax (Interest)",
                          "Roy_Mean_hosthome" = "Withholding tax (Royalties)",
                          "Roy_Mean_direct" = "Withholding tax (Royalties)",
                          "BIT_lag_10" = "BIT (home-host)",
                          "home_host_BIT" = "BIT (home-host)"))

# —TABLE B.7: CONDUIT LOCATION MODELS, ISDS SAMPLE, LOGISTIC REGRESSION ####


cl_all_tax_only_logit <- femlm(chosen ~ conduit_CIT_lag10 +
                                 dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                 Int_effective_Alt_lag10 + Div_effective_Alt_lag10 + netherlands,
                               family = "logit",
                               data = cl_isds,
                               cluster = ~ case)

cl_all_bit_only_logit <- femlm(chosen ~ conduit_host_BIT + netherlands,
                               family = "logit",
                               data = cl_isds,
                               cluster = ~ case)

cl_all_controls_year_logit <- femlm(chosen ~ conduit_CIT_lag10 +
                                      dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                      Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                                      conduit_host_BIT +
                                      log(gdppc_lag10_conduit) + netherlands | year,
                                    family = "logit",
                                    cluster = ~ case,
                                    data = cl_isds)

cl_all_controls_full_logit <- femlm(chosen ~ conduit_CIT_lag10 +
                                      dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                      Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                                      conduit_host_BIT +  log(gdppc_lag10_conduit) +
                                      netherlands | year + case,
                                    family = "logit",
                                    cluster = ~ case,
                                    data = cl_isds,
)

modelsummary(list(cl_all_tax_only_logit, cl_all_bit_only_logit,
                  cl_all_controls_year_logit, cl_all_controls_full_logit),
             output = "latex",
             stars = T,
             coef_map = c("conduit_CIT_lag10" = "Corporate income tax rate",
                          "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
                          "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
                          "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
                          "Int_effective_Alt_lag10" = "Withholding tax (interest)",
                          "conduit_host_BIT" = "BIT (w/host)"))

# —TABLE B.8: CONDUIT LOCATION MODELS, SEC + AMADEUS SAMPLES, LOGISTIC REGRESSION ####

cl_sec_taxonly_logit <- femlm(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective,
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_bitonly_logit <- femlm(
  chosen ~ BIT_lag_10,
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_controls_logit <- femlm(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1),
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_full_logit <- femlm(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

# Amadeus

cl_ama_taxonly_logit <- femlm(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_bitonly_logit <- femlm(
  chosen ~ conduit_host_BIT,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_controls_logit <- femlm(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands |
    DATEINC_year,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_full_logit <- femlm(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

# Combined reg table:

modelsummary(list(cl_sec_taxonly_logit, cl_sec_bitonly_logit,
                  cl_sec_controls_logit, cl_sec_full_logit,
                  cl_ama_taxonly_logit, cl_ama_bitonly_logit,
                  cl_ama_controls_logit, cl_ama_full_logit),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "BIT_lag_10" = "BIT (w/host)",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "potential_conduit_CIT" = "Corporate income tax rate",
                          "conduit_host_BIT" = "BIT (w/host)"))

# —FIGURE B.1: CONDUIT LOCATION MODELS, SEC SAMPLE, BY INDUSTRY ####

run_regs_industry <- function(ind) {
  feols(
    chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
      BTT_lag_10_parentconduit + Div_effective + Int_effective +
      BIT_lag_10 + log(gdppc_WDI_1997+1)| top_parent_id,
    cluster = ~ top_parent_id + id, data = cl_sec %>% 
      filter(industry == ind)
  ) %>% 
    broom::tidy() %>% 
    mutate(industry_name = ind) %>% 
    return()
}

# Create vector of industries (leave out wholesale trade, only 1 firm):

ind_vector <- cl_sec %>% 
  select(ind = industry) %>% 
  filter(ind != "Wholesale trade") %>% 
  unique()

# Iterate:

ind_reg_df <- pmap_dfr(ind_vector, run_regs_industry)

# Plot:

ind_disag_plot <- ind_reg_df %>% 
  mutate(term = case_when(
    term == "BTT_lag_10_parentconduit" ~ "Tax treaty (w/home)",
    term == "BTT_lag_10_conduithost" ~ "Tax treaty (w/host)",
    term == "Div_effective" ~ "Withholding tax\n (dividends)",
    term == "Int_effective" ~ "Withholding tax\n (interest)",
    term == "CIT_lag_10" ~ "Corporate income\n tax rate",
    term == "BIT_lag_10" ~ "BIT (w/host)",
    T ~ "remove"
  ),
  term = factor(term,
                levels = c("BIT (w/host)",
                           "Corporate income\n tax rate",
                           "Withholding tax\n (interest)",
                           "Withholding tax\n (dividends)",
                           "Tax treaty (w/host)",
                           "Tax treaty (w/home)"
                )),
  industry_name = case_when(
    industry_name == "Finance/Insurance/Real estate" ~ "Finance/Insurance/\nReal estate", 
    industry_name == "Transportation/Utilities" ~ "Transport/Utils",
    T ~ industry_name)) %>% 
  filter(term != "remove") %>% 
  mutate(estimate = if_else(grepl("Withholding", term) == T |
                              grepl("rate", term) == T, #rescale tax vars
                            estimate/10, estimate),
         std.error = if_else(grepl("Withholding", term) == T |
                               grepl("rate", term) == T, #rescale WHT vars
                             std.error/10, std.error)) %>% 
  ggplot(aes(x = term, y = estimate)) +
  geom_point(size = 2) + 
  geom_linerange(aes(ymin = estimate - 1.96*std.error,
                     ymax = estimate + 1.96*std.error)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .6) +
  facet_wrap(facets = "industry_name") +
  coord_flip() + 
  theme_bw() +
  theme(axis.text = element_text(size = 12),
        strip.text = element_text(size = 12)) +
  labs(x = "", y = "")

ggsave("ind_disag_plot.jpg", ind_disag_plot,
       width = 6.5, height = 8, units = "in")

# —FIGURE B.2: CONDUIT LOCATION MODELS, AMADEUS SAMPLE, BY INDUSTRY ####


run_regs_industry_ama <- function(ind) {
  feols(
    chosen ~ potential_conduit_CIT + conduit_host_BTT +
      parent_conduit_BTT + Div_effective + Int_effective +
      conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands |
      GUO_NAME + DATEINC_year,
    cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama %>% 
      filter(industry_parent == ind)
  ) %>% 
    broom::tidy() %>% 
    mutate(industry_name = ind) %>% 
    return()
}

# Create vector of industries (limit to industries with 10k+ obs):

ind_vector_ama <- cl_ama %>% 
  group_by(ind = industry_parent) %>% 
  summarize(n = n()) %>% 
  filter(is.na(ind) == F,
         n > 10000) %>% 
  select(-n)

# Iterate:

ind_reg_df_ama <- pmap_dfr(ind_vector_ama, run_regs_industry_ama)

# Plot:

ind_disag_plot_ama <- ind_reg_df_ama %>% 
  mutate(term = case_when(
    term == "parent_conduit_BTT" ~ "Tax treaty (w/home)",
    term == "conduit_host_BTT" ~ "Tax treaty (w/host)",
    term == "Div_effective" ~ "Withholding tax (dividends)",
    term == "Int_effective" ~ "Withholding tax (interest)",
    term == "potential_conduit_CIT" ~ "Corporate income tax rate",
    term == "conduit_host_BIT" ~ "BIT (w/host)",
    T ~ "remove"
  ),
  term = factor(term,
                levels = c("BIT (w/host)",
                           "Corporate income tax rate",
                           "Withholding tax (interest)",
                           "Withholding tax (dividends)",
                           "Tax treaty (w/host)",
                           "Tax treaty (w/home)"
                ))) %>% 
  filter(term != "remove") %>% 
  mutate(estimate = if_else(grepl("Withholding", term) == T |
                              grepl("rate", term) == T, #rescale tax vars
                            estimate/10, estimate),
         std.error = if_else(grepl("Withholding", term) == T |
                               grepl("rate", term) == T, #rescale WHT vars
                             std.error/10, std.error)) %>% 
  ggplot(aes(x = term, y = estimate)) +
  geom_point(size = 2) + 
  geom_linerange(aes(ymin = estimate - 1.96*std.error,
                     ymax = estimate + 1.96*std.error)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .6) +
  facet_wrap(facets = "industry_name") +
  coord_flip() + 
  scale_y_continuous(breaks = c(-0.01, 0.0, 0.01)) +
  theme_bw() +
  theme(axis.text = element_text(size = 12),
        strip.text = element_text(size = 12)) +
  labs(x = "", y = "")

ggsave("ind_disag_plot_ama.jpg", ind_disag_plot_ama,
       width = 8, height = 8, units = "in")

# —TABLE B.9: CONDUIT LOCATION MODELS, ALL SAMPLES, DROPPING BIT VARIABLE ####


isds_cl_nobit <- feols(chosen ~ conduit_CIT_lag10 +
                         dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                         Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                         log(gdppc_lag10_conduit) +
                         netherlands | year + case,
                       cluster = ~case,
                       data = cl_isds,
)

sec_cl_nobit <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

ama_cl_nobit <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

# ——table 

modelsummary(models = list(cl_isds_controls_full, isds_cl_nobit,
                           cl_sec_full, sec_cl_nobit,
                           cl_ama_full, ama_cl_nobit),
             stars = T,
             output = "latex",
             coef_map = c(
               "conduit_CIT_lag10" = "Corporate income tax rate",
               "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
               "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
               "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
               "Int_effective_Alt_lag10" = "Withholding tax (interest)",
               "conduit_host_BIT" = "BIT (w/host)",
               "BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
               "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
               "Div_effective" = "Withholding tax (dividends)",
               "Int_effective" = "Withholding tax (interest)",
               "BIT_lag_10" = "BIT (w/host)",
               "CIT_lag_10" = "Corporate income tax rate",
               "potential_conduit_CIT" = "Corporate income tax rate",
               "conduit_host_BTT" = "Tax treaty (w/host)",
               "parent_conduit_BTT" = "Tax treaty (w/home)"
             )
)

# —TABLE B.10: SELECTION MODELS, INCLUDING BIT x BTT INTERACTION TERM ####


selection_full_int <- feols(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                              Roy_Mean_hosthome + BIT_lag_10 + BIT_lag_10*BTT_lag_10_hosthome +
                              log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                              political_risk_1997 | top_parent_id_parent, cluster = ~ top_parent_id_parent,
                            data = selection_sec)

selection_full_int_logit <- femlm(indirect ~ BTT_lag_10_hosthome + Div_Mean_hosthome + Int_Mean_hosthome +
                                    Roy_Mean_hosthome + BIT_lag_10 + BIT_lag_10*BTT_lag_10_hosthome +
                                    log(gdppc_WDI_1997+1) + v2x_api_VDEM_1997 +
                                    political_risk_1997 | top_parent_id_parent,
                                  family = "logit",
                                  cluster = ~ top_parent_id_parent,
                                  data = selection_sec)

selection_full_ama_int <- feols(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                  Roy_Mean_direct + home_host_BIT + home_host_BTT*home_host_BIT +
                                  log_gdppc_host + v2x_api_VDEM_host +
                                  political_risk_host | GUO_NAME + DATEINC_year,
                                cluster = ~ GUO_NAME,
                                data = selection_ama)

selection_full_ama_int_logit <- femlm(indirect_dummy ~ home_host_BTT + Div_Mean_direct + Int_Mean_direct +
                                        Roy_Mean_direct + home_host_BIT + home_host_BTT*home_host_BIT +
                                        log_gdppc_host + v2x_api_VDEM_host +
                                        political_risk_host | GUO_NAME + DATEINC_year,
                                      family = "logit",
                                      cluster = ~ GUO_NAME,
                                      data = selection_ama)

modelsummary(models = list(selection_full_int, selection_full_int_logit,
                           selection_full_ama_int,
                           selection_full_ama_int_logit),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_hosthome" = "Tax Treaty (home-host)",
                          "home_host_BTT" = "Tax Treaty (home-host)",
                          "Div_Mean_hosthome" = "Withholding tax (Dividends)",
                          "Div_Mean_direct" = "Withholding tax (Dividends)",
                          "Int_Mean_hosthome" = "Withholding tax (Interest)",
                          "Int_Mean_direct" = "Withholding tax (Interest)",
                          "Roy_Mean_hosthome" = "Withholding tax (Royalties)",
                          "Roy_Mean_direct" = "Withholding tax (Royalties)",
                          "BIT_lag_10" = "BIT (home-host)",
                          "home_host_BIT" = "BIT (home-host)",
                          "BTT_lag_10_hosthome:BIT_lag_10" = "Tax Treaty * BIT",
                          "home_host_BTT:home_host_BIT" = "Tax Treaty * BIT"))

# —TABLE B.11: CONDUIT LOCATION MODELS, INCLUDING BIT x BTT INTERACTION TERM, OLS + LOGIT ####


cl_all_controls_int <- feols(chosen ~ conduit_CIT_lag10 +
                               dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                               Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                               conduit_host_BIT +  log(gdppc_lag10_conduit) +
                               netherlands  + dtt_conduit_parent_lag10*conduit_host_BIT +
                               dtt_host_conduit_lag10*conduit_host_BIT | year + case,
                             cluster = ~ case,
                             data = cl_isds,
)

cl_all_controls_int_logit <- femlm(chosen ~ conduit_CIT_lag10 +
                                     dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                                     Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                                     conduit_host_BIT +  log(gdppc_lag10_conduit) +
                                     netherlands  + dtt_conduit_parent_lag10*conduit_host_BIT +
                                     dtt_host_conduit_lag10*conduit_host_BIT | year + case,
                                   family = "logit",
                                   cluster = ~ case,
                                   data = cl_isds,
)

# Conduit location, SEC:

cl_sec_full_int <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands +
    BTT_lag_10_conduithost*BIT_lag_10 + 
    BTT_lag_10_parentconduit*BIT_lag_10 | top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_full_int_logit <- femlm(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands +
    BTT_lag_10_conduithost*BIT_lag_10 + 
    BTT_lag_10_parentconduit*BIT_lag_10 | top_parent_id,
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

# Conduit location, Amadeus:

cl_ama_full_int <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands +
    conduit_host_BIT*conduit_host_BTT +
    conduit_host_BIT*parent_conduit_BTT|
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_full_int_logit <- femlm(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands +
    conduit_host_BIT*conduit_host_BTT +
    conduit_host_BIT*parent_conduit_BTT|
    GUO_NAME + DATEINC_year,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

# table

modelsummary(list(cl_all_controls_int,
                  cl_all_controls_int_logit,
                  cl_sec_full_int,
                  cl_sec_full_int_logit,
                  cl_ama_full_int,
                  cl_ama_full_int_logit),
             output = "latex",
             stars = T,
             coef_map = c("conduit_CIT_lag10" = "Corporate income tax rate",
                          "potential_conduit_CIT" = "Corporate income tax rate",
                          "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
                          "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
                          "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
                          "Int_effective_Alt_lag10" = "Withholding tax (interest)",
                          "conduit_host_BIT" = "BIT (w/host)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "BIT_lag_10" = "BIT (w/host)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "BTT_lag_10_parentconduit:BIT_lag_10" = "Tax treaty (w/home) * BIT",
                          "dtt_conduit_parent_lag10:conduit_host_BIT" = "Tax treaty (w/home) * BIT",
                          "parent_conduit_BTT:conduit_host_BIT" = "Tax treaty (w/home) * BIT",
                          "BTT_lag_10_conduithost:BIT_lag_10" = "Tax treaty (w/host) * BIT",
                          "dtt_host_conduit_lag10:conduit_host_BIT" = "Tax treaty (w/host) * BIT",
                          "conduit_host_BTT:conduit_host_BIT" = "Tax treaty (w/host) * BIT"
             ))

# —TABLE B.12: CONDUIT LOCATION MODELS, SEC + AMADEUS SAMPLES, HOME-HOST BIT VAR + INTERACTIONS ####


cl_sec_full_homehostbit <- feols(
  chosen ~ CIT_lag_10*BIT_lag_10_homehost + BTT_lag_10_conduithost*BIT_lag_10_homehost +
    BTT_lag_10_parentconduit*BIT_lag_10_homehost + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands +
    BIT_lag_10_homehost | top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_full_homehostbit_logit <- femlm(
  chosen ~ CIT_lag_10*BIT_lag_10_homehost + BTT_lag_10_conduithost*BIT_lag_10_homehost +
    BTT_lag_10_parentconduit*BIT_lag_10_homehost + Div_effective + Int_effective +
    BIT_lag_10 + log(gdppc_WDI_1997+1) + netherlands +
    BIT_lag_10_homehost | top_parent_id,
  family = "logit",
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_ama_full_homehostbit <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands +
    home_host_BIT*conduit_host_BTT +
    home_host_BIT*parent_conduit_BTT +
    home_host_BIT*potential_conduit_CIT|
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_full_homehostbit_logit <- femlm(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    conduit_host_BIT + log(gdppc_potentialconduit+1) + netherlands +
    home_host_BIT*conduit_host_BTT +
    home_host_BIT*parent_conduit_BTT +
    home_host_BIT*potential_conduit_CIT|
    GUO_NAME + DATEINC_year,
  family = "logit",
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

# Table

modelsummary(models = list(cl_sec_full_homehostbit,
                           cl_sec_full_homehostbit_logit,
                           cl_ama_full_homehostbit,
                           cl_ama_full_homehostbit_logit),
             output = "latex",
             stars = T,
             coef_map = c("BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "potential_conduit_CIT" = "Corporate income tax rate",
                          "BIT_lag_10" = "BIT (conduit-host)",
                          "conduit_host_BIT" = "BIT (conduit-host)",
                          "BIT_lag_10_homehost" = "BIT (home-host)",
                          "home_host_BIT" = "BIT (home-host)",
                          "BIT_lag_10_homehost:BTT_lag_10_parentconduit" = "Tax treaty (home-conduit) * BIT (home-host)",
                          "BIT_lag_10_homehost:BTT_lag_10_conduithost" = "Tax treaty (conduit-host) * BIT (home-host)",
                          "CIT_lag_10:BIT_lag_10_homehost" = "Corp tax rate * BIT (home-host)",
                          "parent_conduit_BTT:home_host_BIT" = "Tax treaty (home-conduit) * BIT (home-host)",
                          "conduit_host_BTT:home_host_BIT" = "Tax treaty (conduit-host) * BIT (home-host)",
                          "potential_conduit_CIT:home_host_BIT" = "Corp tax rate * BIT (home-host)"))

# —TABLE B.13: CONDUIT LOCATION MODELS, ACCOUNTING FOR BIT HETEROGENEITY ####


cl_isds_biz <- feols(chosen ~ conduit_CIT_lag10 +
                       dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                       Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                       biz_bit + nonbiz_bit + log(gdppc_lag10_conduit) +
                       netherlands | year + case,
                     cluster = ~ case,
                     data = cl_isds
)

cl_isds_dob <- feols(chosen ~ conduit_CIT_lag10 +
                       dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                       Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                       dob_bit + nondob_bit + log(gdppc_lag10_conduit) +
                       netherlands | year + case,
                     cluster = ~ case,
                     data = cl_isds
)

cl_sec_biz <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    biz_bit + nonbiz_bit + log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_sec_dob <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    dob_bit + nondob_bit + log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_ama_biz <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    biz_bit + nonbiz_bit + log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
)

cl_ama_dob <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    dob_bit + nondob_bit + log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
) 

cl_isds_dobbiz <- feols(chosen ~ conduit_CIT_lag10 +
                          dtt_conduit_parent_lag10 + dtt_host_conduit_lag10 +
                          Int_effective_Alt_lag10 + Div_effective_Alt_lag10 +
                          dob_or_biz_bit + non_dob_or_biz_bit + log(gdppc_lag10_conduit) +
                          netherlands | year + case,
                        cluster = ~ case,
                        data = cl_isds,
)

cl_sec_dobbiz <- feols(
  chosen ~ CIT_lag_10 + BTT_lag_10_conduithost +
    BTT_lag_10_parentconduit + Div_effective + Int_effective +
    dob_or_biz_bit + non_dob_or_biz_bit + log(gdppc_WDI_1997+1) + netherlands| top_parent_id,
  cluster = ~ top_parent_id + id, data = cl_sec
)

cl_ama_dobbiz <- feols(
  chosen ~ potential_conduit_CIT + conduit_host_BTT +
    parent_conduit_BTT + Div_effective + Int_effective +
    dob_or_biz_bit + non_dob_or_biz_bit + log(gdppc_potentialconduit+1) + netherlands |
    GUO_NAME + DATEINC_year,
  cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama
) 

# Reg table:

modelsummary(models = list(cl_isds_biz, cl_isds_dob, cl_isds_dobbiz,
                           cl_sec_biz, cl_sec_dob, cl_sec_dobbiz,
                           cl_ama_biz, cl_ama_dob, cl_ama_dobbiz),
             output = "latex",
             stars = T,
             coef_map = c("conduit_CIT_lag10" = "Corporate income tax rate",
                          "dtt_conduit_parent_lag10" = "Tax treaty (w/home)",
                          "dtt_host_conduit_lag10" = "Tax treaty (w/host)",
                          "Div_effective_Alt_lag10" = "Withholding tax (dividends)",
                          "Int_effective_Alt_lag10" = "Withholding tax (interest)",
                          "biz_bit" = "BIT (w/sub. biz. req)",
                          "nonbiz_bit" = "BIT (w/o sub. biz. req)",
                          "dob_bit" = "BIT (w/DOB clause)",
                          "nondob_bit" = "BIT (w/o DOB clause)",
                          "dob_or_biz_bit" = "BIT (either)",
                          "non_dob_or_biz_bit" = "BIT (neither)",
                          "BTT_lag_10_parentconduit" = "Tax treaty (w/home)",
                          "BTT_lag_10_conduithost" = "Tax treaty (w/host)",
                          "Div_effective" = "Withholding tax (dividends)",
                          "Int_effective" = "Withholding tax (interest)",
                          "CIT_lag_10" = "Corporate income tax rate",
                          "parent_conduit_BTT" = "Tax treaty (w/home)",
                          "conduit_host_BTT" = "Tax treaty (w/host)",
                          "potential_conduit_CIT" = "Corporate income tax rate"
             ))


# —FIGURE B.3: CONDUIT LOCATION PREDICTORS, AMADEUS SAMPLE, DISAGGREGATED BY YEAR OF INCORPORATION ####


run_regs_overtime <- function(start, end) {
  feols(
    chosen ~ potential_conduit_CIT + conduit_host_BTT +
      parent_conduit_BTT + Div_effective + Int_effective +
      conduit_host_BIT + log(gdppc_potentialconduit+1) +
      netherlands | GUO_NAME + DATEINC_year,
    cluster = ~ GUO_NAME + SUBSIDIARY_NAME, data = cl_ama %>% 
      filter(DATEINC_year %in% c(start:end))
  ) %>% 
    broom::tidy() %>% 
    mutate(time_period = paste0(start, "-",
                                end)) %>% 
    return()
}

# Generate df of four-year ranges:

years_df <- tibble(
  start = seq(1980, 2004, 4),
  end = seq(1983, 2007, 4)
)

# iterate

years_reg_df <- pmap_df(years_df, run_regs_overtime)

# filter and plot

years_reg_df_plot <- years_reg_df %>% 
  filter(term %in% c("conduit_host_BIT",
                     "conduit_host_BTT",
                     "Div_effective")) %>% 
  mutate(term = case_when(
    term == "conduit_host_BIT" ~ "BIT (w/host)",
    term == "conduit_host_BTT" ~ "Tax treaty (w/host)",
    term == "Div_effective" ~ "With. Tax Rate (Dividends)"
  )) %>% 
  ggplot(aes(x = time_period,
             y = estimate,
             color = term)) +
  geom_point(size = 3, position = position_dodge(width = .5)) +
  geom_linerange(aes(ymax = estimate + (1.96*std.error),
                     ymin = estimate - (1.96*std.error)),
                 position = position_dodge(width = .5)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .75) +
  theme_bw() +
  labs(x = "Date of Subsidiary Incorporation",
       y = "Coefficient Estimate",
       color = "Variable") +
  theme(legend.position = "bottom",
        axis.title = element_text(size = 16),
        axis.text = element_text(size = 12),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14))

ggsave("years_reg_df_plot.jpg", years_reg_df_plot,
       width = 8, height = 4.5, units = "in")
